# -*- coding: utf-8 -*-
'''
Created on Feb 16, 2013

@author: LONG HOANG GIANG
'''

from CrawlerLib2 import commonlib, html2text
from Model.storymodelv3 import StoryModel
import datetime, re


URL = 'http://linhmaroon.wordpress.com/luoi-phai-yeu-anh/'

def getChapters():
    
    tree = commonlib.loadweb(URL).build_tree()
    data = []
    for node in tree.xpath("//p[@style='text-align:center;']//a[contains(.,'PART')]".decode('utf-8')):
        
        title = commonlib.stringify(node).strip()
        href = node.get('href', '')
        if href == '': continue
        print title, href
        data.append({'title': title, 'url': href})
    return data

def getDetail(item):
    
    url = item['url']
    tree = commonlib.loadweb(url).build_tree() 
    detailNode = tree.xpath("//div[@class='entry-content clear-fix']")[0]
    commonlib.Etree.clean_following_sibling(detailNode.xpath("//div[@id='jp-post-flair']"), True)
    commonlib.Etree.clean(detailNode.xpath("//a[contains(., 'About these ads')]"))
    txt = html2text.html2text(commonlib.Etree.tostring(detailNode)).encode('utf-8')
    txt = txt.replace("About these ads", "")
    print txt
    txt = re.sub(r"\n", "<br />", txt)
    txt = '''<strong>{0}</strong><br /><hr /><br />{1}'''.format(item['title'], txt)
    return txt
                
def process():
    
    data = getChapters()
    d = StoryModel('/longhoanggiang/database/lpya')
    d.open(True)
    for chapter in data:
        title = chapter['title'].replace('PART', 'CHƯƠNG')
        detail = getDetail(chapter)
        d.add_story(title, detail, 0)
    d.close()
                
            
if __name__ == '__main__':
    
#    getChapters()
#    getDetail({'title': '', 'url': 'http://linhmaroon.wordpress.com/2012/06/02/part-1-luoi-phai-yeu-anh/'})
    process()
    
    
    print '> Finished at {0}'.format(datetime.datetime.now())        